import random

from gxl_ai_utils.utils import utils_file
import tqdm
path_list = utils_file.load_list_file_clean('./gxl_all_new_wenetspeech_fix.list')
res_data_list = []
for path_i in tqdm.tqdm(path_list,total=len(path_list)):
    if path_i.startswith('/home/work_nfs14/xlgeng/asr_data_shard/wenetspeech_new_all'):
        continue
    res_data_list.append(path_i)

input_dir = '/home/work_nfs8/hwang/shards/huawei_cn_en/cn3/wenetspeech_revised'
tar_list = utils_file.get_file_path_list_for_wav_dir(input_dir,suffix='tar')
res_data_list.extend(tar_list)
random.shuffle(res_data_list)
utils_file.write_list_to_file(res_data_list, './gxl_all_new_wenetspeech_fix_2.list')